The Johns Hopkins University provide data on the coronavirus crisis, reportin the daily number of confirmed cases, deaths and recovered patients.
library('readr')
library('tidyverse')
library('ggplot2')
library('plotly')
url_base <-
"https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_"
type <- c('confirmed', 'deaths', 'recovered')
dta_list <- lapply( type, function(type_){
dta_prov <- read_csv(url(paste0(url_base, type_, '_global.csv'))) %>%
mutate(type = type_)
dta_prov %>%
rename(Province = `Province/State`, Country = `Country/Region`) %>%
pivot_longer(-c(Province, Country, Lat, Long, type), names_to = 'Date', values_to = 'Cases') %>%
mutate(Date = lubridate::mdy(Date))
})
dta <- do.call('rbind', dta_list)Data are available from 2020-01-22 to 2020-04-02 for 181 countries.
Data are available from 2020-01-22 to 2020-04-02 for 181 countries.
The visualisation is done with only the most affected countries. The severity of the pandemy in a country is said to - Low if there are less than 500 confirmed cases - High if there are at least 501 confirmed cases - Very High low if there are at least 1.000110^{4} confirmed cases
dta %>% filter(type == 'confirmed') %>% group_by(country_id) %>%
summarise ( m = max(Cases, na.rm = TRUE)) %>% filter( m > n1) -> country_list
dta %>% filter(type == 'confirmed') %>% group_by(country_id) %>%
summarise ( m = max(Cases, na.rm = TRUE)) %>% filter( m > n2) -> country_list_vhigh
dta %>% filter(country_id %in% country_list$country_id) -> dta
severity_class <- function(country){
prov <- ifelse(country %in% country_list$country_id, 'High', 'Low')
prov <- ifelse(country %in% country_list_vhigh$country_id, 'Very High', prov)
prov <- factor(prov, levels = c('Low', 'High', 'Very High'))
return(prov)
}
dta %>% mutate(Severity = severity_class(country_id)) %>%
mutate(country_id = relevel(country_id, ref = 'germany')) -> dtaThe sf library, allows to produce map easily.
Mapping the data on a world map requires two merge the world shapefile and the Johns Hoppkins data as proposed in the following code. The matching between the two databases is done by creating a country_id identification (country name in lower case). The few mismatches in the 2 databases are idenified and corrected.
library(sf)
world_map <- st_read(dsn = 'datasets/shape_dir/TM_WORLD_BORDERS-0.3.shp') %>%
st_as_sf() %>% mutate(country_id = tolower(NAME))## Reading layer `TM_WORLD_BORDERS-0.3' from data source `/home/metienne/git/MarieEtienne.github.io/datasets/shape_dir/TM_WORLD_BORDERS-0.3.shp' using driver `ESRI Shapefile'
## Simple feature collection with 246 features and 11 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -180 ymin: -90 xmax: 180 ymax: 83.6236
## epsg (SRID): 4326
## proj4string: +proj=longlat +datum=WGS84 +no_defs
country_list %>% filter( !country_id %in% world_map$country_id) -> country_issues
country_issues$country_id## [1] czechia diamond princess iran korea, south
## [5] moldova us
## 181 Levels: afghanistan albania algeria andorra ... zimbabwe
world_map %>% mutate(avail = ifelse(country_id %in% country_list$country_id, TRUE, FALSE)) -> world_map
world_map %>%
mutate( country_id = ifelse(country_id == "united states", 'us' , country_id))%>%
mutate( country_id = ifelse(country_id == "iran (islamic republic of)", 'iran' , country_id)) %>%
mutate( country_id = ifelse(country_id == "korea, republic of", "korea, south" , country_id)) %>%
mutate( country_id = ifelse(country_id == "czech republic", "czechia" , country_id)) -> world_map Finally the three levels of severity are presented in the map below